
clear all
set more off

** Change path to re-run on another computer
cd ""
**

use "Energy_prices_imputed(index)", clear

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*** 0) Dermine which prices to use as base
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
***OIL
bysort country: egen count_hs=count(oil_hs_consUS) if year>=1995
bysort country: egen count_ls=count(oil_ls_consUS) if year>=1995

gen hs=1 if count_hs>= count_ls & count_hs!=. & count_ls!=.
replace hs=0 if hs==. & year>=1995  & count_hs!=. & count_ls!=.


tab country hs 

bysort country: egen hs2=max(hs)
tab country hs2

replace oil_hs_consUS=oil_ls_consUS if hs2==0
replace oil_hs_PPPcons=oil_ls_PPPcons if hs2==0

drop oil_ls* hs hs2 count_*

***COAL
bysort country: egen count_hs=count(coal_steam_consUS) if year>=1995
bysort country: egen count_ls=count(coal_coke_consUS) if year>=1995

gen hs=1 if count_hs>= count_ls & count_hs!=. & count_ls!=.
replace hs=0 if hs==. & year>=1995  & count_hs!=. & count_ls!=.


tab country hs 

bysort country: egen hs2=max(hs)
tab country hs2

replace coal_steam_consUS=coal_coke_consUS if hs2==0
replace coal_steam_PPPcons=coal_coke_PPPcons if hs2==0
 
drop coal_coke* hs hs2 count_*


keep isoalpha2code OECD isonum country year electricity_consUS oil_hs_consUS gas_consUS coal_steam_consUS ///
 rex_coal rex_electricity rex_gas rex_oil_heavy rex_totenergy ///
 wi_coal wi_electricity wi_naturalgas wi_oilproducts ///
 growth_OECDr_oil growth_OECDr_gas growth_OECDr_coal growth_OECDr_electricity flag
 // rex_oil_tot contains automotive
 sort isonum year
 
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*** 1) Use growth rate from RESPECTIVE (i.e. gas, oil..) real price index for OECD countries 
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$


local rex "rex_oil_heavy rex_electricity rex_gas rex_coal" // rex_oil_tot

foreach x of local rex{
	gen growth_`x'=(`x'-L.`x')/L.`x'
}

rename growth_rex_oil_heavy  growth_rex_oil


//only take what will use:

local fuel "oil_hs_consUS gas_consUS coal_steam_consUS electricity_consUS "

foreach x of local fuel{
	g `x'_respindex_L=`x'
	g `x'_respindex_F=`x'
}

rename oil_hs_consUS_respindex* oil_consUS_respindex*
rename coal_steam_consUS_respindex* coal_consUS_respindex*


local fuel "oil gas coal electricity"

foreach x of local fuel{
	forvalues i = 1/60 {
		replace `x'_consUS_respindex_L=L.`x'_consUS_respindex_L*(1+growth_rex_`x') if `x'_consUS_respindex_L ==.
	}
	forvalues i = 1/60 {
		replace `x'_consUS_respindex_F=F.`x'_consUS_respindex_F/(1+F.growth_rex_`x') if `x'_consUS_respindex_F ==.
	}
	*now take mean
	egen `x'_consUS_respindex=rowmean(`x'_consUS_respindex_L `x'_consUS_respindex_F)
	label var `x'_consUS_respindex "Price in cons USD (missing imputed with respective real price index)"

}

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*** 2) Use growth rate from real RESPECTIVE WHOLESALE price index (i.e. gas, oil..) for NON-OECD countries MAINLY, but some for coal, and one or two obs for gas.
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

local wi "wi_oilproducts wi_electricity wi_naturalgas wi_coal"

foreach x of local wi{
	gen growth_`x'=(`x'-L.`x')/L.`x'
}

rename (growth_wi_oilproducts growth_wi_naturalgas)  (growth_wi_oil  growth_wi_gas)


*check that no wi where we have rex
list cou year if rex_oil_heavy==. & wi_oil!=. & OECD==1 & oil_hs_consUS==. & year>1995
list cou year if rex_coal==. & wi_coal!=. & OECD==1 & coal_steam_consUS==. & year>1995
// quite a few, the rest there are none
list cou year if rex_gas==. & wi_naturalgas!=. & OECD==1 & gas_consUS==. & year>1995
list cou year if rex_electricity==. & wi_elec!=. & OECD==1 & electricity_consUS==. & year>1995

sum *_consUS_respindex if OECD==1
/*

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
oil_consUS~x |      1,064    436.1582    231.0488          0   1372.747
gas_consUS~x |        985    436.0062     236.063   52.14067   2243.482
coal_consU~x |        733    199.4236    124.0223          0    1273.79
electricit~x |      1,175    1318.699    515.5642   463.0721   3619.523
*/

local fuel "oil gas coal electricity"


foreach x of local fuel{
	forvalues i = 1/60 {
		replace `x'_consUS_respindex_L=L.`x'_consUS_respindex_L*(1+growth_wi_`x') if `x'_consUS_respindex_L ==. // & OECD==0
	}
	forvalues i = 1/60 {
		replace `x'_consUS_respindex_F=F.`x'_consUS_respindex_F/(1+F.growth_wi_`x') if `x'_consUS_respindex_F ==. // & OECD==0
	}
	*now take mean
	egen `x'_consUS_respindext=rowmean(`x'_consUS_respindex_L `x'_consUS_respindex_F)
	
	replace `x'_consUS_respindex=`x'_consUS_respindext if  `x'_consUS_respindex==. // OECD==0 &

}

drop *respindex_F *respindex_L *respindext

sum *_consUS_respindex if OECD==1
/*
    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
oil_consUS~x |      1,064    436.1582    231.0488          0   1372.747
gas_consUS~x |        987    436.4684    236.2437   52.14067   2243.482
coal_consU~x |        780    197.0646    121.9124          0    1273.79
electricit~x |      1,175    1318.699    515.5642   463.0721   3619.52
*/

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
* 3) Create resptot by applying the growht rate from total index to the already imputed respind
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
gen growth_rex=(rex_totenergy-L.rex_totenergy)/L.rex_totenergy

local fuel "oil gas coal electricity"

foreach x of local fuel{
	g `x'_consUS_resptot_L=`x'_consUS_respindex
	g `x'_consUS_resptot_F=`x'_consUS_respindex
	}

foreach x of local fuel{
	forvalues i = 1/60 {
		replace `x'_consUS_resptot_L =L.`x'_consUS_resptot_L*(1+growth_rex) if `x'_consUS_resptot_L ==.
	}
	forvalues i = 1/60 {
		replace `x'_consUS_resptot_F =F.`x'_consUS_resptot_F/(1+F.growth_rex) if `x'_consUS_resptot_F==.
	}
	egen `x'_consUS_resptotindex=rowmean(`x'_consUS_resptot_L `x'_consUS_resptot_F )
	label var `x'_consUS_resptotindex "Price in cons USD (missing imputed with respective and total real price index)"
}


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
* 4) China: use datapoint for gas in 1997 and extent with an average of wholesale oil, electricity and coal until 2004 and then use the gas wholesale index
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$


gen avg_growth_temp = (growth_wi_oil + growth_wi_electricity + growth_wi_coal)/3 if country=="China"
*need to spare respindex for below, so rename after
g gas_consUS_respindex_before=gas_consUS_respindex

forvalues i = 1/20 {
		replace gas_consUS_respindex =F.gas_consUS_respindex/(1+F.avg_growth_temp) if gas_consUS_respindex ==. & country=="China" 
		replace gas_consUS_resptotindex =F.gas_consUS_resptotindex/(1+F.avg_growth_temp) if gas_consUS_resptotindex ==. & country=="China" 

	}

forvalues i = 1/7 {
	replace gas_consUS_respindex=L.gas_consUS_respindex*(1+avg_growth_temp) if gas_consUS_respindex ==. & country=="China" & year<2005
	replace gas_consUS_resptotindex =L.gas_consUS_resptotindex*(1+avg_growth_temp) if gas_consUS_resptotindex ==. & country=="China" & year<2005

}

forvalues i = 1/20 {
	replace gas_consUS_respindex=L.gas_consUS_respindex*(1+growth_wi_gas) if gas_consUS_respindex ==. & country=="China" & year>=2005
	replace gas_consUS_resptotindex =L.gas_consUS_resptotindex*(1+growth_wi_gas) if gas_consUS_resptotindex ==. & country=="China" & year>=2005
}


drop avg_growth_temp

rename gas_consUS_respindex gas_consUS_respindex_wCh
rename gas_consUS_respindex_before gas_consUS_respindex

rename *resptotindex *restotOLD
drop *resptot_F *resptot_L


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
*** 5) Create alternative imputation by applying the growht rate from OECD regional index 
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

local fuel "oil gas coal electricity" // not electricity

foreach x of local fuel{
	g `x'_consUS_resptot_L=`x'_consUS_respindex
	g `x'_consUS_resptot_F=`x'_consUS_respindex
	}




foreach x of local fuel{
	forvalues i = 1/60 {
		replace `x'_consUS_resptot_L =L.`x'_consUS_resptot_L*(1+growth_OECDr_`x') if `x'_consUS_resptot_L ==.
	}
	forvalues i = 1/60 {
		replace `x'_consUS_resptot_F =F.`x'_consUS_resptot_F/(1+F.growth_OECDr_`x') if `x'_consUS_resptot_F==.
	}
	egen `x'_consUS_resptotindex=rowmean(`x'_consUS_resptot_L `x'_consUS_resptot_F )
	label var `x'_consUS_resptotindex "Price in cons USD (missing imputed with respective and total real price index)"
}



drop *resptot_F *resptot_L



*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
** 7) Use imputation that performs better by country from VEPL LOOCV exercise
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

***BY country and by fuel take index with lower RMSE
preserve
use  "Energy_prices_imputed_RMSE_resp", clear
// take resp as basis

drop *oecdb* *oecd2*
foreach x of varlist rmse*{
*replace `x'=round(`x',0.1)
}

foreach fuel in oil gas coal electricity{
g usetote_`fuel'=.
replace usetote_`fuel'=1 if rmse_tote_c_`fuel'<rmse_oecd_c_`fuel' & rmse_tote_c_`fuel'!=. & rmse_oecd_c_`fuel'!=.
replace usetote_`fuel'=0 if rmse_tote_c_`fuel'>rmse_oecd_c_`fuel' & rmse_tote_c_`fuel'!=. & rmse_oecd_c_`fuel'!=.
}
keep cou usetote_*
tempfile indchoice
save `indchoice'

restore


* Merge totindex choice back in 
***************************
merge m:1 country using `indchoice'
drop _m

*1) for countries where choose totindex:
***************************
**first check how many post 1994
preserve
keep if year>1994
sort cou year
foreach x in oil gas coal electricity{
foreach y in consUS {
di "`x'_`y'"
list cou year `x'_`y'_respindex  `x'_`y'_restotOLD `x'_`y'_resptotindex if  `x'_`y'_resptotindex!=`x'_`y'_restotOLD   & usetote_`x'==1 
replace `x'_`y'_resptotindex=`x'_`y'_restotOLD  if usetote_`x'==1
*check if for nonoecd
assert `x'_`y'_restotOLD==. if `x'_`y'_respindex==. & OECD==0 & cou!="China"
}
}
restore

 foreach x in oil gas coal electricity{
foreach y in consUS {
di "`x'_`y'"
list cou year `x'_`y'_respindex  `x'_`y'_restotOLD `x'_`y'_resptotindex if  `x'_`y'_resptotindex!=`x'_`y'_restotOLD   & usetote_`x'==1 
replace `x'_`y'_resptotindex=`x'_`y'_restotOLD  if usetote_`x'==1
*check if for nonoecd
assert `x'_`y'_restotOLD==. if `x'_`y'_respindex==. & OECD==0 & cou!="China"
}
}


*2) for non-OECD non EU:
***************************
g OECD2=OECD
replace OECD2=1 if cou=="Bulgaria" | cou=="Croatia" | cou=="Cyprus" | cou=="Latvia" | cou=="Lithuania" | cou=="Malta" | cou=="Romania" 


***for non OECD replace resptot with resptot old, so all to missing. 

foreach x in oil gas coal electricity{
foreach y in consUS {
replace `x'_`y'_resptotindex=`x'_`y'_restotOLD if OECD2==0
}
}
* for gas also china replaced:
sum gas_consUS_respindex_wCh  gas_consUS_respindex 
replace gas_consUS_respindex=gas_consUS_respindex_wCh if cou=="China"

drop OECD2

keep isonum isoalpha2code country year flag electricity_consUS oil_hs_consUS gas_consUS coal_steam_consUS oil_consUS_respindex coal_consUS_respindex electricity_consUS_respindex gas_consUS_respindex oil_consUS_resptotindex gas_consUS_resptotindex coal_consUS_resptotindex electricity_consUS_resptotindex
rename (oil_hs_consUS coal_steam_consUS) (oil_consUS coal_consUS)

*only keep the resptot index but create flag based on cou year
g flag_fepi_price=.

g temp=0
g temp2=0
g temp3=0
foreach fuel in oil gas coal electricity {
replace temp=temp+`fuel'_consUS_resptotindex
replace temp2=temp2+`fuel'_consUS_respindex
replace temp3=temp3+`fuel'_consUS
}
sum temp*

replace flag_fepi_price=-1 if temp==. // all missing
replace flag_fepi_price=2 if temp!=. // the resptot
replace flag_fepi_price=1 if temp2!=. // all resp
replace flag_fepi_price=0 if temp3!=. // obs
tab flag_fepi_price
tab flag_fepi_price flag_VEPL if year>=1995
drop temp*

label var flag_fepi_price "-1=missing, 0=all obs., 1=resp. index imp., 2=agg index imp."
rename flag_fepi_price flag_FEPI

drop electricity_consUS oil_consUS gas_consUS coal_consUS oil_consUS_respindex coal_consUS_respindex electricity_consUS_respindex gas_consUS_respindex

foreach fuel in oil gas coal electricity {
g double `fuel'_log_imputed=ln(`fuel'_consUS_resptotindex) 
}

drop *resptot* flag_VEPL

save "Energy_prices_imputed(index)_forFEPI", replace

